clear all

*Define global path for replication package
global path "~/Dropbox/IT_Revolution/Replication_package/JPE submission"

global path_rawdata "$path/Raw_data"
global path_cleandata "$path/Clean_data"
global path_output "$path/Output"

cap mkdir "$path_output"

***Step 1: Prepare the data
********************************************************************************

*1.1: Manufacturing (1900-1940)

foreach p in 75 50 {
	
	use "$path_cleandata/temp_distance_all_early.dta", clear

	preserve

		sum exp_elec,det

		gen O_L=0
		replace O_L = 1 if exp_elec<=r(p`p')

		gen O_H = 0
		replace O_H = 1 if exp_elec_2>r(p`p')

		keep if O_L==1 & O_H==1
		bys occ1990dd: egen m_d_ent_uw=mean(d_ent)

		gen exposure="exp_elec"
		
		save "$path_cleandata/distribution_d_pre`p'", replace
		
	restore

	sum exposure_m,det

	gen O_L=0
	replace O_L = 1 if exposure_m<=r(p`p')

	gen O_H = 0
	replace O_H = 1 if exposure_m_2>r(p`p')

	keep if O_L==1 & O_H==1
	bys occ1990dd: egen m_d_ent_uw=mean(d_ent)

	gen exposure="exposure_m"
	
	append using "$path_cleandata/distribution_d_pre`p'"
	
	save "$path_cleandata/distribution_d_pre`p'", replace
}

*1:2: ICT (1980-2018)

foreach p in 75 50 {
	
	use "$path_cleandata/temp_distance_all_later.dta", clear

	preserve

		sum exposure_ict,det

		gen O_L=0
		replace O_L = 1 if exposure_ict<=r(p`p')

		gen O_H = 0
		replace O_H = 1 if exposure_ict_2>r(p`p')

		keep if O_L==1 & O_H==1
		bys occ1990dd: egen m_d_ent_uw=mean(d_ent)

		gen exposure="exposure_ict"

		save "$path_cleandata/distribution_d_post`p'", replace
		
	restore

	sum exposure_a,det

	gen O_L=0
	replace O_L = 1 if exposure_a<=r(p`p')

	gen O_H = 0
	replace O_H = 1 if exposure_a_2>r(p`p')

	keep if O_L==1 & O_H==1
	bys occ1990dd: egen m_d_ent_uw=mean(d_ent)

	gen exposure="exposure_a"
	
	append using "$path_cleandata/distribution_d_post`p'"

	save "$path_cleandata/distribution_d_post`p'", replace
}

*Step 2: Produce the graphs
*******************************************************************************
use "$path_cleandata/distribution_d_post75", clear
gen period = 2

append using "$path_cleandata/distribution_d_pre75"

replace period = 1 if period == .

save "$path_cleandata/temp_skilldifference_all75", replace

*2.1: Figure 2

* Mean across high occupations (weighted and unweighted)
gen emp_2 = emp1980_2
replace emp_2 = emp1900_2 if emp_2==.
collapse (firstnm) m_d_ent_uw (mean) m_d_ent_w=d_ent [aw=emp_2], by(occ1990dd period exposure)

twoway (histogram m_d_ent_w if period==1 & exposure=="exposure_m", fcolor(gray) lcolor(none) width(0.05)  fraction ) ( histogram m_d_ent_w if period==2 & exposure=="exposure_ict", fcolor(none) lc(black) width(0.05) fraction ), ///
scheme(s1color)   legend(order( - "Occupations exposure to:" 2   1)  label( 1 "Manufacturing") label(2 "ICT") row(1) )  ///
xtitle("Weighted average distance from Highly Exposed Occupations", size(medsmall)) ytitle("Fraction of Least Exposed Occupations", size(medsmall))
graph export "$path_output/Fig2_hist_ent_wavg.png", as(png) replace
graph export "$path_output/Fig2_hist_ent_wavg.eps", as(eps) replace
ksmirnov m_d_ent_w if exposure == "exposure_m" | exposure == "exposure_ict", by(period)

*2.2: Figure A5
use "$path_cleandata/temp_skilldifference_all75", clear

* Panel B (bilateral distance rather than averages)
twoway ( histogram d_ent if period==1 & exposure=="exposure_m", fcolor(gray) lc(none) width(0.05) fraction ) (histogram d_ent if period==2 & exposure=="exposure_ict", fcolor(none) lcolor(black) width(0.05)  fraction ) , ///
	scheme(s1color)   legend(order( - "Occupations exposure to:" 2   1)  label( 1 "Manufacturing") label(2 "ICT") row(1) )  ///
	xtitle("Distance from Highly Exposed Occupations", size(medsmall)) ytitle("Fraction of Least Exposed Occupations", size(medsmall))
graph export "$path_output/FigA5_B_bilateral_dist.png", as(png) replace
graph export "$path_output/FigA5_B_bilateral_dist.eps", as(eps) replace
ksmirnov d_ent if exposure == "exposure_m" | exposure == "exposure_ict", by(period)

* Mean across high occupations (weighted and unweighted)
gen emp_2 = emp1980_2
replace emp_2 = emp1900_2 if emp_2==.
collapse (firstnm) m_d_ent_uw (mean) m_d_ent_w=d_ent [aw=emp_2], by(occ1990dd period exposure)

* Panel C (alternative measures)
twoway ( histogram m_d_ent_w if period==1 & exposure=="exp_elec", fcolor(gray) lc(none) width(0.05) fraction ) (histogram m_d_ent_w if period==2 & exposure=="exposure_a", fcolor(none) lcolor(black) width(0.05)  fraction ) , ///
	scheme(s1color)   legend(order( - "Occupations exposure to:" 2   1)  label( 1 "Manufacturing") label(2 "ICT") row(1) )  ///
	xtitle("Weighted average distance from Highly Exposed Occupations", size(medsmall)) ytitle("Fraction of Least Exposed Occupations", size(medsmall))	
graph export "$path_output/FigA5_C_alt_exp.png", as(png) replace
graph export "$path_output/FigA5_C_alt_exp.eps", as(eps) replace
ksmirnov m_d_ent_w if exposure == "exp_elec" | exposure == "exposure_a", by(period)

* Panel D (unweighted)
twoway ( histogram m_d_ent_uw if period==1 & exposure=="exposure_m", fcolor(gray) lc(none) width(0.05) fraction ) (histogram m_d_ent_uw if period==2 & exposure=="exposure_ict", fcolor(none) lcolor(black) width(0.05)  fraction ) , ///
	scheme(s1color)   legend(order( - "Occupations exposure to:" 2	 1)  label( 1 "Manufacturing") label(2 "ICT") row(1) )  ///
	xtitle("Average distance from Highly Exposed Occupations", size(medsmall)) ytitle("Fraction of Least Exposed Occupations", size(medsmall))
graph export "$path_output/FigA5_D_uwavg.png", as(png) replace
graph export "$path_output/FigA5_D_uwavg.eps", as(eps) replace
ksmirnov m_d_ent_uw if exposure == "exposure_m" | exposure == "exposure_ict", by(period)

* Panel A (50% cutoff)
use "$path_cleandata/distribution_d_post50", clear
gen period = 2

append using "$path_cleandata/distribution_d_pre50"

replace period = 1 if period == .

save "$path_cleandata/temp_skilldifference_all50", replace

gen emp_2 = emp1980_2
replace emp_2 = emp1900_2 if emp_2==.
collapse (firstnm) m_d_ent_uw (mean) m_d_ent_w=d_ent [aw=emp_2], by(occ1990dd period exposure)

twoway ( histogram m_d_ent_w if period==1 & exposure=="exposure_m", fcolor(gray) lc(none) width(0.05) fraction ) (histogram m_d_ent_w if period==2 & exposure=="exposure_ict", fcolor(none) lcolor(black) width(0.05)  fraction ) , ///
scheme(s1color)   legend(order( - "Occupations exposure to:" 2   1)  label( 1 "Manufacturing") label(2 "ICT") row(1) )  ///
xtitle("Weighted average distance from Highly Exposed Occupations", size(medsmall)) ytitle("Fraction of Least Exposed Occupations", size(medsmall))
graph export "$path_output/FigA5_A_alt_cutoff.png", as(png) replace
graph export "$path_output/FigA5_A_alt_cutoff.eps", as(eps) replace
ksmirnov m_d_ent_w if exposure == "exposure_m" | exposure == "exposure_ict", by(period)


*Erase temp files
erase "$path_cleandata/distribution_d_pre50.dta"
erase "$path_cleandata/distribution_d_pre75.dta"
erase "$path_cleandata/distribution_d_post50.dta"
erase "$path_cleandata/distribution_d_post75.dta"
erase "$path_cleandata/temp_skilldifference_all75.dta"
erase "$path_cleandata/temp_skilldifference_all50.dta"
